#> Loading required package: PerformanceAnalytics
#> Loading required package: xts
#> Loading required package: zoo
#>
#> Attaching package: 'zoo'
#>
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
#>
#>
#> ######################### Warning from 'xts' package ##########################
#> # #
#> # The dplyr lag() function breaks how base R's lag() function is supposed to #
#> # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
#> # source() into this session won't work correctly. #
#> # #
#> # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
#> # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
#> # dplyr from breaking base R's lag() function. #
#> # #
#> # Code in packages is not affected. It's protected by R's namespace mechanism #
#> # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
#> # #
#> ###############################################################################
#>
#> Attaching package: 'xts'
#>
#> The following objects are masked from 'package:dplyr':
#>
#> first, last
#>
#>
#> Attaching package: 'PerformanceAnalytics'
#>
#> The following object is masked from 'package:graphics':
#>
#> legend
#>
#> Loading required package: quantmod
#> Loading required package: TTR
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(broom)library(umap)library(dplyr)library(tidyr)library(tibble)library(ggplot2)library(ggrepel)##----.read the data----sp_500_prices_tbl <-readRDS("C:/Users/mosta/Desktop/Business Decisions with Machine Learning/1/sp_500_prices_tbl.rds")sp_500_prices_tbl
sp_500_index_tbl <-readRDS("C:/Users/mosta/Desktop/Business Decisions with Machine Learning/1/sp_500_index_tbl.rds")sp_500_index_tbl
kmeans_obj <- stock_date_matrix_tbl %>%select(-symbol) %>%kmeans(centers =4, nstart =20)#Get the tot.withinss using glance()glance(kmeans_obj)
##----.step4 (Find the optimal value of K)----kmeans_mapper <-function(center =4) { stock_date_matrix_tbl %>%select(-symbol) %>%kmeans(centers = center, nstart =20)}4%>%kmeans_mapper() %>%glance()
#> Warning: There was 1 warning in `mutate()`.
#> ℹ In argument: `k_means = centers %>% map(kmeans_mapper)`.
#> Caused by warning:
#> ! did not converge in 10 iterations
#Scree Plotkmeans_mapped_tbl %>%unnest(glance) %>%select(centers, tot.withinss) %>%ggplot(aes(centers, tot.withinss)) +geom_point(color ="#2DC6D6", size =4) +geom_line(color ="#2DC6D6", size =1) + ggrepel::geom_label_repel(aes(label = centers), color ="#2DC6D6",max.overlaps =30) +labs(title ="Scree Plot",subtitle ="Measures the distance each of the symbols are from the closes K-Means center",caption ="Conclusion: Based on the Scree Plot, We can see that the Scree Plot becomes linear (constant rate of change) between 5 and 10 centers for K.")
#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
#> ℹ Please use `linewidth` instead.